fl_valid_sub <- read.csv("data/fl_random_forest.csv")
head(fl_valid_sub)


classify <- function(df) {
  
  eth <- c("whi", "bla", "his", "asi", "oth")
  
  # Create dummy  for each race, where 1 indicates race with highest probability and all other dummies are 0
  for (k in 1:5) {
    df[paste("pred", eth[k], "class", sep = ".")] <- ifelse(is.na(df[paste("pred", eth[k], sep = ".")]), NA, 0)
    df[, paste("pred", eth[k], "class", sep = ".")] <- 
      ifelse(df[paste("pred", eth[k], sep = ".")] > df[paste("pred", eth[-k][1], sep = ".")] & 
               df[paste("pred", eth[k], sep = ".")] > df[paste("pred", eth[-k][2], sep = ".")] & 
               df[paste("pred", eth[k], sep = ".")] > df[paste("pred", eth[-k][3], sep = ".")] & 
               df[paste("pred", eth[k], sep = ".")] > df[paste("pred", eth[-k][4], sep = ".")], 
             1, df[, paste("pred", eth[k], "class", sep = ".")])
  }
  return(df)
}

fl_preds <- subset(fl_valid_sub, select = c("pred.whi", "pred.bla", "pred.lat", "pred.asi", "pred.oth"))
fl_preds$pred.his <- fl_preds$pred.lat
fl_preds$pred.lat <- NULL

fl_test <- classify(fl_preds)


fl_valid_sub$predicted.white <- as.numeric(fl_test$pred.whi.class)
fl_valid_sub$predicted.black <- as.numeric(fl_test$pred.bla.class)
fl_valid_sub$predicted.latino <- as.numeric(fl_test$pred.his.class)
fl_valid_sub$predicted.asian <- as.numeric(fl_test$pred.asi.class)
fl_valid_sub$predicted.other <- as.numeric(fl_test$pred.oth.class)

fl_valid_sub$predicted.race = ""
fl_valid_sub$predicted.race[fl_valid_sub$predicted.white == 1] <- "White"
fl_valid_sub$predicted.race[fl_valid_sub$predicted.black == 1] <- "Black"
fl_valid_sub$predicted.race[fl_valid_sub$predicted.latino == 1] <- "Hispanic"
fl_valid_sub$predicted.race[fl_valid_sub$predicted.asian == 1] <- "Asian"
fl_valid_sub$predicted.race[fl_valid_sub$predicted.other == 1] <- "Other"

fl_valid_sub$predicted.white.rf <- ifelse(fl_valid_sub$rf.race == "White", 1, 0)
fl_valid_sub$predicted.black.rf <- ifelse(fl_valid_sub$rf.race == "Black", 1, 0)
fl_valid_sub$predicted.latino.rf <- ifelse(fl_valid_sub$rf.race == "Hispanic", 1, 0)
fl_valid_sub$predicted.asian.rf <- ifelse(fl_valid_sub$rf.race == "Asian", 1, 0)
fl_valid_sub$predicted.other.rf <- ifelse(fl_valid_sub$rf.race == "Other", 1, 0)

#create dummies for race variables
fl_valid_sub$sr.white <- ifelse(fl_valid_sub$sr.race == "White", 1, 0)
fl_valid_sub$sr.black <- ifelse(fl_valid_sub$sr.race == "Black", 1, 0)
fl_valid_sub$sr.hispanic <- ifelse(fl_valid_sub$sr.race == "Hispanic", 1, 0)
fl_valid_sub$sr.asian <- ifelse(fl_valid_sub$sr.race == "Asian", 1, 0)
fl_valid_sub$sr.other <- ifelse(fl_valid_sub$sr.race == "Other", 1, 0)



##############################################
fl_valid_sub$wrong.race <- ifelse(fl_valid_sub$sr.race != fl_valid_sub$predicted.race, 1, 0)
fl_valid_sub$wrong.race.rf <- ifelse(fl_valid_sub$sr.race != fl_valid_sub$rf.race, 1, 0)

#TABLE 1 - random forest overall error rate
round(mean(fl_valid_sub$wrong.race.rf, na.rm = T), 3)
#0.142
fl_valid_sub$rf.race.factor <- as.factor(fl_valid_sub$rf.race)
fl_valid_sub$sr.race.factor <- as.factor(fl_valid_sub$sr.race)
#TABLE 1 - F1-score
library(yardstick)
f_meas(fl_valid_sub, truth = sr.race.factor, estimate = rf.race.factor)
#0.630

#TABLE 1 - false negative - i.e.  classifying a latino as non-latino
round(mean(fl_valid_sub$wrong.race.rf[fl_valid_sub$sr.race == "White"], na.rm = T), 3)
#0.082
round(mean(fl_valid_sub$wrong.race.rf[fl_valid_sub$sr.race == "Black"], na.rm = T), 3)
#0.231
round(mean(fl_valid_sub$wrong.race.rf[fl_valid_sub$sr.race == "Hispanic"], na.rm = T), 3)
#0.141
round(mean(fl_valid_sub$wrong.race.rf[fl_valid_sub$sr.race == "Asian"], na.rm = T), 3)
#0.476
round(mean(fl_valid_sub$wrong.race.rf[fl_valid_sub$sr.race == "Other"], na.rm = T), 3)
#0.949

#TABLE 1 - false positive - i.e.  classifying a non-white as white
round(mean(fl_valid_sub$predicted.white.rf[fl_valid_sub$sr.race != "White"], na.rm = T), 3)
#0.189
round(mean(fl_valid_sub$predicted.black.rf[fl_valid_sub$sr.race != "Black"], na.rm = T), 3)
#0.041
round(mean(fl_valid_sub$predicted.latino.rf[fl_valid_sub$sr.race != "Hispanic" ], na.rm = T), 3)
#0.035
round(mean(fl_valid_sub$predicted.asian.rf[fl_valid_sub$sr.race != "Asian"], na.rm = T), 3)
#0.007
round(mean(fl_valid_sub$predicted.other.rf[fl_valid_sub$sr.race != "Other"], na.rm = T), 3)
#0.004


###################
###################
#TABLE A.4 - TABLE 2 BUT FOR FLORIDA IN APPENDIX
###################
###################
#INCOME

#White
a <- summary(fl_valid_sub$median_income[fl_valid_sub$sr.white == 1])[4]
b <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.white == 1])[4]
c <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.white.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Black
a <- summary(fl_valid_sub$median_income[fl_valid_sub$sr.black == 1])[4]
b <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.black == 1])[4]
c <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.black.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Latino
a <- summary(fl_valid_sub$median_income[fl_valid_sub$sr.hispanic == 1])[4]
b <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.latino == 1])[4]
c <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.latino.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Asian
a <- summary(fl_valid_sub$median_income[fl_valid_sub$sr.asian == 1])[4]
b <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.asian == 1])[4]
c <- summary(fl_valid_sub$median_income[fl_valid_sub$predicted.asian.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)


#####################################################################
#housing value
#White
a <- summary(fl_valid_sub$median.house.price[fl_valid_sub$sr.white == 1])[4]
b <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.white == 1])[4]
c <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.white.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Black
a <- summary(fl_valid_sub$median.house.price[fl_valid_sub$sr.black == 1])[4]
b <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.black == 1])[4]
c <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.black.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Latino
a <- summary(fl_valid_sub$median.house.price[fl_valid_sub$sr.hispanic == 1])[4]
b <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.latino == 1])[4]
c <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.latino.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#Asian
a <- summary(fl_valid_sub$median.house.price[fl_valid_sub$sr.asian == 1])[4]
b <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.asian == 1])[4]
c <- summary(fl_valid_sub$median.house.price[fl_valid_sub$predicted.asian.rf == 1])[4]

round(a,0)
round(b,0)
round((b-a)/a*100,2)
round(c,0)
round((c-a)/a*100,2)

#######################################
#donors
donor.actual <- tapply(X=fl_valid_sub$donor[fl_valid_sub$donor == 1], INDEX = fl_valid_sub$sr.race[fl_valid_sub$donor == 1], function(x) length(x)) / sum(fl_valid_sub$donor) * 100
donor.predicted <- tapply(X=fl_valid_sub$donor[fl_valid_sub$donor == 1], INDEX = fl_valid_sub$predicted.race[fl_valid_sub$donor == 1], function(x) length(x)) / sum(fl_valid_sub$donor) * 100
donor.predicted.rf <- tapply(X=fl_valid_sub$donor[fl_valid_sub$donor == 1], INDEX = fl_valid_sub$rf.race[fl_valid_sub$donor == 1], function(x) length(x)) / sum(fl_valid_sub$donor) * 100

round(donor.actual[c(5,2,3,1)], 2)
# White    Black    Hispanic    Asian 
# 79.02     6.14    11.67     1.47 
round(donor.predicted[c(6,3,4,2)], 2)
# White    Black    Hispanic    Asian 
# 81.35     4.22    12.83     1.50 
round((donor.predicted[c(6,3,4,2)] - donor.actual[c(5,2,3,1)]) / donor.actual[c(5,2,3,1)] * 100, 2)
# White    Black    Hispanic    Asian 
# 2.95   -31.34     9.90     2.57 

round(donor.predicted.rf[c(5,2,3,1)], 2)
# White    Black    Hispanic    Asian 
# 79.60     6.34    12.41     1.42 
round((donor.predicted.rf[c(5,2,3,1)] - donor.actual[c(5,2,3,1)]) / donor.actual[c(5,2,3,1)] * 100, 2)
# White    Black    Hispanic    Asian 
# 0.73     3.30     6.31    -3.22 


##############################################
#TURNOUT
#overall turnout rate

#CVAP NUMBERS FOR TURNOUT DENOMINATOR
cvap <- read.csv("data/CVAP_Tract.csv")
cvap$GEO_ID <- substring(cvap$geoid, 8)
cvap.white <- subset(cvap, lntitle == "White Alone")
cvap.black <- subset(cvap, lntitle == "Black or African American Alone")
cvap.latino <- subset(cvap, lntitle == "Hispanic or Latino")
cvap.asian <- subset(cvap, lntitle == "Asian Alone")
cvap.total <- subset(cvap, lntitle == "Total")

#cvap WHITE voters
white.voters.cvap <- sum(cvap.white$cvap_est[substr(cvap.white$GEO_ID, 1, 2) == "12"])
#number of WHITE voters, self-reported
white.voters.sr <- sum(fl_valid_sub$voted.16g[fl_valid_sub$sr.white == 1])
#downweight cvap to 20% to account for this is a 20% sample of florida voter file
overall.sr.white <- 100*white.voters.sr/(.20*white.voters.cvap)
#number of WHITE voters, BISG
white.voters.bisg <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.white == 1])
overall.bisg.white <- 100*white.voters.bisg/(.20*white.voters.cvap)
#number of WHITE voters, BISG+RF
white.voters.rf <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.white.rf == 1])
overall.rf.white <- 100*white.voters.rf/(.20*white.voters.cvap)

#cvap BLACK voters
black.voters.cvap <- sum(cvap.black$cvap_est[substr(cvap.black$GEO_ID, 1, 2) == "12"])
#number of BLACK voters, self-reported
black.voters.sr <- sum(fl_valid_sub$voted.16g[fl_valid_sub$sr.black == 1])
overall.sr.black <- 100*black.voters.sr/(.20*black.voters.cvap)
#number of BLACK voters, BISG
black.voters.bisg <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.black == 1])
overall.bisg.black <- 100*black.voters.bisg/(.20*black.voters.cvap)
#number of BLACK voters, BISG+RF
black.voters.rf <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.black.rf == 1])
overall.rf.black <- 100*black.voters.rf/(.20*black.voters.cvap)

#cvap latino voters
latino.voters.cvap <- sum(cvap.latino$cvap_est[substr(cvap.latino$GEO_ID, 1, 2) == "12"])
#number of latino voters, self-reported
latino.voters.sr <- sum(fl_valid_sub$voted.16g[fl_valid_sub$sr.hispanic == 1])
overall.sr.latino <- 100*latino.voters.sr/(.20*latino.voters.cvap)
#number of latino voters, BISG
latino.voters.bisg <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.latino == 1])
overall.bisg.latino <- 100*latino.voters.bisg/(.20*latino.voters.cvap)
#number of latino voters, BISG+RF
latino.voters.rf <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.latino.rf == 1])
overall.rf.latino <- 100*latino.voters.rf/(.20*latino.voters.cvap)

#cvap asian voters
asian.voters.cvap <- sum(cvap.asian$cvap_est[substr(cvap.asian$GEO_ID, 1, 2) == "12"])
#number of asian voters, self-reported
asian.voters.sr <- sum(fl_valid_sub$voted.16g[fl_valid_sub$sr.asian == 1])
overall.sr.asian <- 100*asian.voters.sr/(.20*asian.voters.cvap)
#number of asian voters, BISG
asian.voters.bisg <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.asian == 1])
overall.bisg.asian <- 100*asian.voters.bisg/(.20*asian.voters.cvap)
#number of asian voters, BISG+RF
asian.voters.rf <- sum(fl_valid_sub$voted.16g[fl_valid_sub$predicted.asian.rf == 1])
overall.rf.asian <- 100*asian.voters.rf/(.20*asian.voters.cvap)

round(overall.sr.white, 2)
round(overall.bisg.white, 2)
round(100*(overall.bisg.white-overall.sr.white)/overall.sr.white, 2)
round(overall.rf.white, 2)
round(100*(overall.rf.white-overall.sr.white)/overall.sr.white, 2)

round(overall.sr.black, 2)
round(overall.bisg.black, 2)
round(100*(overall.bisg.black-overall.sr.black)/overall.sr.black, 2)
round(overall.rf.black, 2)
round(100*(overall.rf.black-overall.sr.black)/overall.sr.black, 2)

round(overall.sr.latino, 2)
round(overall.bisg.latino, 2)
round(100*(overall.bisg.latino-overall.sr.latino)/overall.sr.latino, 2)
round(overall.rf.latino, 2)
round(100*(overall.rf.latino-overall.sr.latino)/overall.sr.latino, 2)

round(overall.sr.asian, 2)
round(overall.bisg.asian, 2)
round(100*(overall.bisg.asian-overall.sr.asian)/overall.sr.asian, 2)
round(overall.rf.asian, 2)
round(100*(overall.rf.asian-overall.sr.asian)/overall.sr.asian, 2)

####TRACT DIVERSITY #########################################################
fl_valid_sub$tract.largest.race <- ""
fl_valid_sub$tract.largest.race[fl_valid_sub$cvap.pct.white > fl_valid_sub$cvap.pct.black & fl_valid_sub$cvap.pct.white > fl_valid_sub$cvap.pct.latino & fl_valid_sub$cvap.pct.white > fl_valid_sub$cvap.pct.asian] <- "White"
fl_valid_sub$tract.largest.race[fl_valid_sub$cvap.pct.black > fl_valid_sub$cvap.pct.white & fl_valid_sub$cvap.pct.black > fl_valid_sub$cvap.pct.latino & fl_valid_sub$cvap.pct.black > fl_valid_sub$cvap.pct.asian] <- "Black"
fl_valid_sub$tract.largest.race[fl_valid_sub$cvap.pct.latino > fl_valid_sub$cvap.pct.white & fl_valid_sub$cvap.pct.latino > fl_valid_sub$cvap.pct.black & fl_valid_sub$cvap.pct.latino > fl_valid_sub$cvap.pct.asian] <- "Hispanic"
fl_valid_sub$tract.largest.race[fl_valid_sub$cvap.pct.asian > fl_valid_sub$cvap.pct.white & fl_valid_sub$cvap.pct.asian > fl_valid_sub$cvap.pct.black & fl_valid_sub$cvap.pct.asian > fl_valid_sub$cvap.pct.latino] <- "Asian"

fl_valid_sub$local.minority <- 0
fl_valid_sub$local.minority[fl_valid_sub$sr.white == 1 & fl_valid_sub$tract.largest.race != "White"] <- 1
fl_valid_sub$local.minority[fl_valid_sub$sr.black == 1 & fl_valid_sub$tract.largest.race != "Black"] <- 1
fl_valid_sub$local.minority[fl_valid_sub$sr.hispanic == 1 & fl_valid_sub$tract.largest.race != "Hispanic"] <- 1
fl_valid_sub$local.minority[fl_valid_sub$sr.asian == 1 & fl_valid_sub$tract.largest.race != "Asian"] <- 1

fl_valid_sub$local.minority.bisg <- 0
fl_valid_sub$local.minority.bisg[fl_valid_sub$predicted.white == 1 & fl_valid_sub$tract.largest.race != "White"] <- 1
fl_valid_sub$local.minority.bisg[fl_valid_sub$predicted.black == 1 & fl_valid_sub$tract.largest.race != "Black"] <- 1
fl_valid_sub$local.minority.bisg[fl_valid_sub$predicted.latino == 1 & fl_valid_sub$tract.largest.race != "Hispanic"] <- 1
fl_valid_sub$local.minority.bisg[fl_valid_sub$predicted.asian == 1 & fl_valid_sub$tract.largest.race != "Asian"] <- 1

fl_valid_sub$local.minority.rf <- 0
fl_valid_sub$local.minority.rf[fl_valid_sub$predicted.white.rf == 1 & fl_valid_sub$tract.largest.race != "White"] <- 1
fl_valid_sub$local.minority.rf[fl_valid_sub$predicted.black.rf == 1 & fl_valid_sub$tract.largest.race != "Black"] <- 1
fl_valid_sub$local.minority.rf[fl_valid_sub$predicted.latino.rf == 1 & fl_valid_sub$tract.largest.race != "Hispanic"] <- 1
fl_valid_sub$local.minority.rf[fl_valid_sub$predicted.asian.rf == 1 & fl_valid_sub$tract.largest.race != "Asian"] <- 1

minority.actual <- tapply(X=fl_valid_sub$local.minority, INDEX = fl_valid_sub$sr.race, function(x) mean(x))* 100
minority.predicted <- tapply(X=fl_valid_sub$local.minority.bisg, INDEX = fl_valid_sub$predicted.race, function(x) mean(x))* 100
minority.predicted.rf <- tapply(X=fl_valid_sub$local.minority.rf, INDEX = fl_valid_sub$rf.race, function(x) mean(x))* 100

round(minority.actual[c(5,2,3,1)], 2)
# White    Black    Hispanic    Asian 
# 7.11    59.46    51.60   100.00
round(minority.predicted[c(6,3,4,2)], 2)
# White    Black    Hispanic    Asian 
#  6.00    36.27    50.48   100.00 
round((minority.predicted[c(6,3,4,2)] - minority.actual[c(5,2,3,1)]) / minority.actual[c(5,2,3,1)] * 100, 2)
# White    Black    Hispanic    Asian 
# -15.66   -38.99    -2.15     0.00 

round(minority.predicted.rf[c(5,2,3,1)], 2)
# White    Black    Hispanic    Asian 
# 5.64    56.14    50.30   100.00 
round((minority.predicted.rf[c(5,2,3,1)] - minority.actual[c(5,2,3,1)]) / minority.actual[c(5,2,3,1)] * 100, 2)
# White    Black    Hispanic    Asian 
# -20.67    -5.59    -2.52     0.00 


# florida$income_rounded <- as.numeric(florida$income_rounded)
# florida$college_rounded <- as.numeric(florida$college_rounded)
# florida$homeowner_rounded <- as.numeric(florida$homeowner_rounded)
# florida$vote_rounded <- as.numeric(florida$vote_rounded)
# florida$log_donations_rounded <- as.numeric(florida$log_donations_rounded)


##################################################
##### FIGURE A.12 - FIGURE 2 but for FLORIDA #####
##################################################
fl_valid_sub$vote_rounded <- round(fl_valid_sub$pred.vote, 2)
fl_valid_sub$log_donations_rounded <- round(log(fl_valid_sub$donations.per.cap+1), 1)

fl_black <- subset(fl_valid_sub, sr.race == "Black")
fl_white <- subset(fl_valid_sub, sr.race == "White")
fl_latino <- subset(fl_valid_sub, sr.race == "Hispanic")
fl_asian <- subset(fl_valid_sub, sr.race == "Asian")

#INCOME
#BLACK
black.wrong.race <- tapply(X = fl_black$wrong.race, INDEX = fl_black$income_rounded, function(x) mean(x, na.rm = T))
black.wrong.race.forest <- tapply(X = fl_black$wrong.race.rf, INDEX = fl_black$income_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_black$wrong.race, INDEX = fl_black$income_rounded, length)
m <- loess(black.wrong.race ~ names(black.wrong.race), weights = point.size, span = .6)
m.forest <- loess(black.wrong.race.forest ~ names(black.wrong.race.forest), weights = point.size, span = .6)

plot(names(black.wrong.race), black.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Black Voters", ylab = "Misclassification Rate", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .9)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 5, col = "dark green", lty = 2)
box()
text(80000, .85, "BISG", pos = 4)
text(80000, .4, "BISG + Random Forest", pos = 4)

#LATINO
latino.wrong.race <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$income_rounded, function(x) mean(x, na.rm = T))
latino.wrong.race.forest <- tapply(X = fl_latino$wrong.race.rf, INDEX = fl_latino$income_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$income_rounded, length)
m <- loess(latino.wrong.race ~ names(latino.wrong.race), weights = point.size, span = .6)
m.forest <- loess(latino.wrong.race.forest ~ names(latino.wrong.race.forest), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Hispanic Voters", ylab = "Misclassification Rate", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .9)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 5, col = "dark green", lty = 2)
box()

#ASIAN
asian.wrong.race <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$income_rounded, function(x) mean(x, na.rm = T))
asian.wrong.race.forest <- tapply(X = fl_asian$wrong.race.rf, INDEX = fl_asian$income_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$income_rounded, length)
m <- loess(asian.wrong.race ~ names(asian.wrong.race), weights = point.size, span = .6)
m.forest <- loess(asian.wrong.race.forest ~ names(asian.wrong.race.forest), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "Asian Voters", ylab = "Misclassification rate", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .9)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 5, col = "dark green", lty = 2)
box()

#WHITE
white.wrong.race <- tapply(X = fl_white$wrong.race, INDEX = fl_white$income_rounded, function(x) mean(x, na.rm = T))
white.wrong.race.forest <- tapply(X = fl_white$wrong.race.rf, INDEX = fl_white$income_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_white$wrong.race, INDEX = fl_white$income_rounded, length)
m <- loess(white.wrong.race ~ names(white.wrong.race), weights = point.size, span = .6)
m.forest <- loess(white.wrong.race.forest ~ names(white.wrong.race.forest), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Median Income of Census Tract ($1,000)", main = "White Voters", ylab = "Misclassification Rate", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 220000))
axis(side = 1, labels = seq(0, 220, 20), at = seq(0, 220000, 20000), cex.axis = .9)
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 5, col = "dark green", lty = 2)
box()




###### FIGURE A.11 (grid)
#wrong prediction and EDUCATION with all/rf data
#BLACK
black.wrong.race <- tapply(X = fl_black$wrong.race, INDEX = fl_black$college_rounded, function(x) mean(x, na.rm = T))
black.wrong.race.forest <- tapply(X = fl_black$wrong.race.rf, INDEX = fl_black$college_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_black$wrong.race, INDEX = fl_black$college_rounded, length)
m <- loess(black.wrong.race ~ names(black.wrong.race), weights = point.size, span = .6)
m.forest <- loess(black.wrong.race.forest ~ names(black.wrong.race.forest), weights = point.size, span = .6)

plot(names(black.wrong.race), black.wrong.race.forest, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()
text(30, .8, "BISG", pos = 4, cex = 2)
text(35, .3, "BISG + Random Forest", pos = 4, cex = 2)

#LATINO
latino.wrong.race <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$college_rounded, function(x) mean(x, na.rm = T))
latino.wrong.race.forest <- tapply(X = fl_latino$wrong.race.rf, INDEX = fl_latino$college_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$college_rounded, length)
m <- loess(latino.wrong.race ~ names(latino.wrong.race), weights = point.size, span = .6)
m.forest <- loess(latino.wrong.race.forest ~ names(latino.wrong.race.forest), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Latino Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
#lines(x=m.logit$x, y=m.logit$fitted, lwd = 2, col = "dark blue", lty = 2)
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#ASIAN
asian.wrong.race <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$college_rounded, function(x) mean(x, na.rm = T))
asian.wrong.race.forest <- tapply(X = fl_asian$wrong.race.rf, INDEX = fl_asian$college_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$college_rounded, length)
m <- loess(asian.wrong.race ~ names(asian.wrong.race), weights = point.size, span = .6)
m.forest <- loess(asian.wrong.race.forest ~ names(asian.wrong.race.forest), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#WHITE
white.wrong.race <- tapply(X = fl_white$wrong.race, INDEX = fl_white$college_rounded, function(x) mean(x, na.rm = T))
white.wrong.race.forest <- tapply(X = fl_white$wrong.race.rf, INDEX = fl_white$college_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_white$wrong.race, INDEX = fl_white$college_rounded, length)
m <- loess(white.wrong.race ~ names(white.wrong.race), weights = point.size, span = .6)
m.forest <- loess(white.wrong.race.forest ~ names(white.wrong.race.forest), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract with College Degree", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()


#wrong prediction and HOME OWNERSHIP with all/rf data
#BLACK
black.wrong.race <- tapply(X = fl_black$wrong.race, INDEX = fl_black$homeowner_rounded, function(x) mean(x, na.rm = T))
black.wrong.race.forest <- tapply(X = fl_black$wrong.race.rf, INDEX = fl_black$homeowner_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_black$wrong.race, INDEX = fl_black$homeowner_rounded, length)
m <- loess(black.wrong.race ~ names(black.wrong.race), weights = point.size, span = .6)
m.forest <- loess(black.wrong.race.forest ~ names(black.wrong.race.forest), weights = point.size, span = .6)

plot(names(black.wrong.race), black.wrong.race.forest, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()
text(65, .7, "BISG", pos = 4, cex = 2)
text(55, .1, "BISG +\n Random Forest", pos = 4, cex = 2)

#LATINO
latino.wrong.race <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$homeowner_rounded, function(x) mean(x, na.rm = T))
latino.wrong.race.forest <- tapply(X = fl_latino$wrong.race.rf, INDEX = fl_latino$homeowner_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$homeowner_rounded, length)
m <- loess(latino.wrong.race ~ names(latino.wrong.race), weights = point.size, span = .6)
m.forest <- loess(latino.wrong.race.forest ~ names(latino.wrong.race.forest), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Latino Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#ASIAN
asian.wrong.race <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$homeowner_rounded, function(x) mean(x, na.rm = T))
asian.wrong.race.forest <- tapply(X = fl_asian$wrong.race.rf, INDEX = fl_asian$homeowner_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$homeowner_rounded, length)
m <- loess(asian.wrong.race ~ names(asian.wrong.race), weights = point.size, span = .6)
m.forest <- loess(asian.wrong.race.forest ~ names(asian.wrong.race.forest), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#WHITE
white.wrong.race <- tapply(X = fl_white$wrong.race, INDEX = fl_white$homeowner_rounded, function(x) mean(x, na.rm = T))
white.wrong.race.forest <- tapply(X = fl_white$wrong.race.rf, INDEX = fl_white$homeowner_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_white$wrong.race, INDEX = fl_white$homeowner_rounded, length)
m <- loess(white.wrong.race ~ names(white.wrong.race), weights = point.size, span = .6)
m.forest <- loess(white.wrong.race.forest ~ names(white.wrong.race.forest), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Percent of Census Tract who are Homeowners", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 100))
axis(side = 1, labels = seq(0, 100, 10), at = seq(0, 100, 10))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#wrong prediction and VOTE PROPENSITY with all/rf data
#BLACK
black.wrong.race <- tapply(X = fl_black$wrong.race, INDEX = fl_black$vote_rounded, function(x) mean(x, na.rm = T))
black.wrong.race.forest <- tapply(X = fl_black$wrong.race.rf, INDEX = fl_black$vote_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_black$wrong.race, INDEX = fl_black$vote_rounded, length)
m <- loess(black.wrong.race ~ names(black.wrong.race), weights = point.size, span = .6)
m.forest <- loess(black.wrong.race.forest ~ names(black.wrong.race.forest), weights = point.size, span = .6)

plot(names(black.wrong.race), black.wrong.race.forest, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .1), at = seq(0, 1, .1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()
text(.5, .5, "BISG", pos = 4, cex = 2)
text(.5, .12, "BISG +\n Random Forest", pos = 4, cex = 2)

#LATINO
latino.wrong.race <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$vote_rounded, function(x) mean(x, na.rm = T))
latino.wrong.race.forest <- tapply(X = fl_latino$wrong.race.rf, INDEX = fl_latino$vote_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$vote_rounded, length)
m <- loess(latino.wrong.race ~ names(latino.wrong.race), weights = point.size, span = .6)
m.forest <- loess(latino.wrong.race.forest ~ names(latino.wrong.race.forest), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Latino Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .1), at = seq(0, 1, .1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#ASIAN
asian.wrong.race <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$vote_rounded, function(x) mean(x, na.rm = T))
asian.wrong.race.forest <- tapply(X = fl_asian$wrong.race.rf, INDEX = fl_asian$vote_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$vote_rounded, length)
m <- loess(asian.wrong.race ~ names(asian.wrong.race), weights = point.size, span = .6)
m.forest <- loess(asian.wrong.race.forest ~ names(asian.wrong.race.forest), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .1), at = seq(0, 1, .1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#WHITE
white.wrong.race <- tapply(X = fl_white$wrong.race, INDEX = fl_white$vote_rounded, function(x) mean(x, na.rm = T))
white.wrong.race.forest <- tapply(X = fl_white$wrong.race.rf, INDEX = fl_white$vote_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_white$wrong.race, INDEX = fl_white$vote_rounded, length)
m <- loess(white.wrong.race ~ names(white.wrong.race), weights = point.size, span = .6)
m.forest <- loess(white.wrong.race.forest ~ names(white.wrong.race.forest), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Predicted Probability of Voting in 2016", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 1))
axis(side = 1, labels = seq(0, 1, .1), at = seq(0, 1, .1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()


#wrong prediction and CAMPAIGN CONTRIBUTIONS with all/rf data
#BLACK
black.wrong.race <- tapply(X = fl_black$wrong.race, INDEX = fl_black$log_donations_rounded, function(x) mean(x, na.rm = T))
black.wrong.race.forest <- tapply(X = fl_black$wrong.race.rf, INDEX = fl_black$log_donations_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_black$wrong.race, INDEX = fl_black$log_donations_rounded, length)
m <- loess(black.wrong.race ~ names(black.wrong.race), weights = point.size, span = .6)
m.forest <- loess(black.wrong.race.forest ~ names(black.wrong.race.forest), weights = point.size, span = .6)

plot(names(black.wrong.race), black.wrong.race.forest, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Black Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()
text(2, .6, "BISG", pos = 4, cex = 2)
text(2, .15, "BISG +\n Random Forest", pos = 4, cex = 2)

#LATINO
latino.wrong.race <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$log_donations_rounded, function(x) mean(x, na.rm = T))
latino.wrong.race.forest <- tapply(X = fl_latino$wrong.race.rf, INDEX = fl_latino$log_donations_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_latino$wrong.race, INDEX = fl_latino$log_donations_rounded, length)
m <- loess(latino.wrong.race ~ names(latino.wrong.race), weights = point.size, span = .6)
m.forest <- loess(latino.wrong.race.forest ~ names(latino.wrong.race.forest), weights = point.size, span = .6)

plot(names(latino.wrong.race), latino.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Latino Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#ASIAN
asian.wrong.race <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$log_donations_rounded, function(x) mean(x, na.rm = T))
asian.wrong.race.forest <- tapply(X = fl_asian$wrong.race.rf, INDEX = fl_asian$log_donations_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_asian$wrong.race, INDEX = fl_asian$log_donations_rounded, length)
m <- loess(asian.wrong.race ~ names(asian.wrong.race), weights = point.size, span = .6)
m.forest <- loess(asian.wrong.race.forest ~ names(asian.wrong.race.forest), weights = point.size, span = .6)

plot(names(asian.wrong.race), asian.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - Asian Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()

#WHITE
white.wrong.race <- tapply(X = fl_white$wrong.race, INDEX = fl_white$log_donations_rounded, function(x) mean(x, na.rm = T))
white.wrong.race.forest <- tapply(X = fl_white$wrong.race.rf, INDEX = fl_white$log_donations_rounded, function(x) mean(x, na.rm = T))

point.size <- tapply(X = fl_white$wrong.race, INDEX = fl_white$log_donations_rounded, length)
m <- loess(white.wrong.race ~ names(white.wrong.race), weights = point.size, span = .6)
m.forest <- loess(white.wrong.race.forest ~ names(white.wrong.race.forest), weights = point.size, span = .6)

plot(names(white.wrong.race), white.wrong.race, ylim = c(0, 1), xlab = "Campaign Contributions of Zip Code (logged $/capita)", main = "Proportion of Race Misclassification - White Voters", ylab = "Proportion", pch = 16, cex = 0, col = "#99999999", axes = F, xlim = c(0, 5))
axis(side = 1, labels = seq(0, 5, 1), at = seq(0, 5, 1))
axis(side = 2, labels = seq(0, 1, .1), at = seq(0, 1, .1), las = 2)
lines(x=m$x, y=m$fitted, lwd = 2, col = "dark red")
lines(x=m.forest$x, y=m.forest$fitted, lwd = 2, col = "dark green", lty = 3)
box()
